# Working with geograhic data in dataframes
import geopandas as gpd
# Working with dataframes
import pandas as pd
# Visualizing data
import matplotlib.pyplot as plt
# Leaftlet wraparound for mapping
import folium
# Ignores future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
file = 'data/CENSUS_TRACTS_2010/CENSUS_TRACTS_2010.shp'
la_gdf = gpd.read_file(file)
la_gdf.head(3)
# Inspect the shape
la_gdf.shape
# Inspect Dataypes
la_gdf.dtypes
Field Descriptions
print(la_gdf.crs)
Documentation: https://spatialreference.org/ref/epsg/2229/
la_gdf.geometry = la_gdf.geometry.to_crs(epsg = 3857)
print(la_gdf.crs)
# define a variable for m^2 to km^2
sqm_to_sqkm = 10**6
# get area in kilometers squared
la_gdf['area'] = la_gdf.geometry.area / sqm_to_sqkm
# Inspect updated geodataframe
la_gdf.head(3)
la_gdf.geometry = la_gdf.geometry.to_crs(epsg = 4326)
print(la_gdf.crs)
# The Census tract label for these locations are stored in variables
san_clemente_island = '5991.00'
santa_catalina_island = '5990.00'
# Return the geodataframe without san clemente island and santa catalina island
la_gdf = la_gdf[~la_gdf.LABEL.str.contains(san_clemente_island) & ~la_gdf.LABEL.str.contains(santa_catalina_island)]
# Confirm islands were removed by checking dataframe shape
la_gdf.shape
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 20))
la_gdf.plot(ax=ax)
plt.show()
# Import new dataset that contains median household income
new_file = "data/CENSUS_TRACTS_2010_MEDHINC/CENSUS_TRACTS_2010_MEDHINC.shp"
la_median_income_gdf = gpd.read_file(new_file)
la_median_income_gdf.head(3)
# The 4326 epsg systen uses latitude and longitude for next visualization step
la_median_income_gdf = la_median_income_gdf.to_crs(epsg=4326)
# Subset geodataframe for relevant columns
columns = ['CT10', 'LABEL', 'MEDHINC']
la_median_gdf = la_median_income_gdf[columns]
# Join/Merge data
joined_gpdf = la_gdf.merge(la_median_gdf, on=['CT10', 'LABEL'], how='left')
# Inspect Data
joined_gpdf.head(3)
# subplot method that sets the figure dimensions
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 10))
# use matplotlib plot method on geodataframe, color is set by the median household income colomn
# cmap selects a green continuous color map
joined_gpdf.plot(column = 'MEDHINC', edgecolor='black',
cmap='Greens', legend='True' ,ax=ax)
ax.set_aspect('auto')
plt.show()
# Center point and map for Los Angeles
los_angeles = [34.2822, -118.2437]
m = folium.Map(location=los_angeles, zoom_start=9)
# Define a choropleth layer for the map
m.choropleth(
geo_data=joined_gpdf,
name='geometry',
data=joined_gpdf,
columns=['LABEL', 'MEDHINC'],
key_on='feature.properties.LABEL',
fill_color='YlGn',
fill_opacity=0.75,
line_opacity=0.5,
legend_name='Median Household Income by Census Tract'
)
# Add layer control and display
folium.LayerControl().add_to(m)
display(m)